In [ ]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#set background color grey
sns.set_theme(style="darkgrid")
In [ ]:
df = pd.read_csv("all_turns_2.csv")
df = df[df['person_robot'] == 'robot']
df.drop(columns=['Unnamed: 0'], inplace=True)
df['turn_duration'] = 0.2*(df['end_idx'].astype('float') - df['start_idx'].astype('float'))
df.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 107.0 1851.644860 608.431703 407.000000 2102.000000 2105.000000 2107.000000 2111.000000
path_num 107.0 2.261682 0.743988 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 107.0 2.644860 1.864527 1.000000 1.000000 2.000000 3.000000 10.000000
start_idx 107.0 527.757009 425.483695 57.000000 238.000000 420.000000 660.000000 2164.000000
end_idx 107.0 586.364486 426.185857 83.000000 297.500000 489.000000 708.000000 2264.000000
walking_direction_lag 107.0 -4.775701 23.691535 -79.000000 -15.500000 0.000000 7.000000 64.000000
walking_direction_base_corr 107.0 0.065443 0.398870 -0.706492 -0.308203 0.082524 0.401292 0.866582
walking_direction_lagged_corr 107.0 0.472802 0.152860 0.188645 0.367763 0.447397 0.563752 0.955078
walking_direction_dtw 107.0 49.845938 28.586508 4.432664 28.554681 45.706446 60.959196 162.955230
speeds_lag 107.0 -3.093458 17.179701 -69.000000 -12.000000 -1.000000 4.500000 44.000000
speeds_base_corr 107.0 0.144590 0.318698 -0.663930 -0.092252 0.192048 0.391395 0.839497
speeds_lagged_corr 107.0 0.466142 0.135817 0.194506 0.367961 0.460162 0.547553 0.839497
speeds_dtw 107.0 40.684982 19.690040 10.222585 27.259617 35.758098 50.797445 141.492438
mean_distance 107.0 2.407653 1.329329 0.336612 1.462095 2.205431 3.052211 9.579321
mean_speed_difference 107.0 0.406715 0.154904 0.086809 0.308490 0.368687 0.479630 0.915065
mean_walking_direction_difference 107.0 60.972269 18.422779 14.856844 49.219162 61.661211 72.246630 120.316045
mean_pace_asymmetry 107.0 0.443048 0.123088 0.173722 0.358705 0.436978 0.505316 0.804450
turn_duration 107.0 11.721495 6.440600 5.000000 7.000000 11.000000 13.800000 46.200000
In [ ]:
# box plot turn duration
sns.boxplot(x=df['turn_duration'])
plt.title('Box plot of turn duration')
plt.show()
No description has been provided for this image
In [ ]:
df['normalized_walking_direction_dtw'] = df['walking_direction_dtw'] / (df['turn_duration'] / 0.2)
df['normalized_speeds_dtw'] = df['speeds_dtw'] / (df['turn_duration'] / 0.2)
In [ ]:
df['abs_walking_direction_lag'] = df['walking_direction_lag'].abs()
df['abs_speeds_lag'] = df['speeds_lag'].abs()
In [ ]:
relevant_features = [
       'turn_duration', 
       'mean_distance',
       'mean_pace_asymmetry',
       'walking_direction_lag', 
       'abs_walking_direction_lag',
       'walking_direction_dtw', 
       'normalized_walking_direction_dtw', 
       # 'walking_direction_base_corr', 
       'walking_direction_lagged_corr', 
       # 'mean_walking_direction_difference', 
       'speeds_lag', 
       'abs_speeds_lag',
       'speeds_dtw', 
       'normalized_speeds_dtw', 
       # 'speeds_base_corr', 
       'speeds_lagged_corr', 
       # 'mean_speed_difference',
       ]
In [ ]:
corr = df[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".2f", mask=mask)
# make it bigger
plt.title(f"Metrics Correlation Matrix - Original Data (n={len(df)})")
plt.show()
No description has been provided for this image
In [ ]:
# find rows with the same participant_id and path_num with overlapping start_idx and end_idx
df['overlapping'] = False
for index, row in df.iterrows():
    if len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['start_idx'] >= row['start_idx']) & (df['start_idx'] <= row['end_idx'])]) > 1 or \
        len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['end_idx'] >= row['start_idx']) & (df['end_idx'] <= row['end_idx'])]) > 1:
        df.at[index, 'overlapping'] = True
    
# overlapping_and_not_subset = df[df['overlapping'] == True]
overlapping_and_not_subset = df
oans = overlapping_and_not_subset

corr_oans = oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr_oans) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Overlapping Data (n={len(oans)})")
plt.show()
No description has been provided for this image
In [ ]:
threshold = 0.3
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
# filtered_oans = filtered_df[filtered_df['overlapping'] == True]
filtered_oans = filtered_df

corr_filtered_oans = filtered_oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = (np.abs(corr_filtered_oans) < 0.3) 
plt.figure(figsize=(12, 10))
sns.heatmap(corr_filtered_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Filtered Overlapping Data (n={len(filtered_oans)})")
plt.show()
No description has been provided for this image
In [ ]:
filtered_oans.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 84.0 1862.857143 597.721379 407.000000 2102.000000 2105.000000 2107.000000 2111.000000
path_num 84.0 2.190476 0.752125 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 84.0 2.416667 1.592212 1.000000 1.000000 2.000000 3.000000 8.000000
start_idx 84.0 501.000000 382.805056 57.000000 230.250000 403.000000 658.000000 1720.000000
end_idx 84.0 557.083333 379.685070 83.000000 292.250000 456.000000 700.250000 1800.000000
walking_direction_lag 84.0 -5.535714 22.021045 -79.000000 -14.750000 0.000000 7.000000 43.000000
walking_direction_base_corr 84.0 0.091925 0.409362 -0.706492 -0.261322 0.108055 0.417556 0.866582
walking_direction_lagged_corr 84.0 0.501833 0.145125 0.303473 0.390332 0.482601 0.599382 0.955078
walking_direction_dtw 84.0 47.606303 27.863207 4.432664 27.602502 43.471100 58.967774 162.955230
speeds_lag 84.0 -3.761905 13.922868 -48.000000 -11.250000 -1.000000 3.000000 32.000000
speeds_base_corr 84.0 0.182460 0.323466 -0.663930 -0.058960 0.214539 0.435689 0.839497
speeds_lagged_corr 84.0 0.497610 0.125933 0.301195 0.394321 0.478842 0.568946 0.839497
speeds_dtw 84.0 38.275962 16.284959 10.222585 27.005187 34.873133 47.807880 89.189543
mean_distance 84.0 2.472509 1.403922 0.471139 1.469126 2.213552 3.249714 9.579321
mean_speed_difference 84.0 0.391795 0.149112 0.103170 0.309907 0.359158 0.441056 0.915065
mean_walking_direction_difference 84.0 59.442886 17.176702 14.856844 47.398511 61.236454 71.112776 102.815835
mean_pace_asymmetry 84.0 0.430764 0.112938 0.209666 0.347402 0.410259 0.479218 0.778551
turn_duration 84.0 11.216667 5.224706 5.000000 7.300000 10.300000 13.600000 34.200000
normalized_walking_direction_dtw 84.0 0.851407 0.307541 0.152850 0.665227 0.814061 0.973838 1.683822
normalized_speeds_dtw 84.0 0.714920 0.198747 0.328462 0.574489 0.707928 0.812527 1.395960
abs_walking_direction_lag 84.0 15.630952 16.391138 0.000000 3.000000 10.000000 23.250000 79.000000
abs_speeds_lag 84.0 9.571429 10.744890 0.000000 2.000000 6.000000 13.000000 48.000000
In [ ]:
from scipy.stats import pearsonr

for feature in relevant_features:
    to_display = []
    for feature2 in relevant_features:
        if not feature.startswith(feature2) and not feature2.startswith(feature) and not feature.endswith(feature2) and not feature2.endswith(feature)\
            and np.abs(corr_filtered_oans.loc[feature, feature2]) > 0.3:
            to_display.append(feature2)
    if len(to_display) == 0:
        continue
    # set plot grid of 1xlen(to_display)
    fig, axs = plt.subplots(int(np.ceil(len(to_display)/3)), min(len(to_display),3), figsize=(5*min(len(to_display),3),5*int(np.ceil(len(to_display)/3))))
    # print(axs.shape)
    axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot
    for i, feature2 in enumerate(to_display):
        peares = pearsonr(filtered_oans[feature], filtered_oans[feature2], alternative='two-sided')
        pcorr, p_val = peares.statistic, peares.pvalue
        CI = peares.confidence_interval(confidence_level=0.95)
        # Scatter plot
        sns.scatterplot(x=feature, y=feature2, data=filtered_oans, ax=axs[int(np.ceil(i/3))-1,i%3])
        # Regression line
        sns.regplot(x=feature, y=feature2, data=filtered_oans, scatter=False, line_kws={'color': 'red'}, ax=axs[int(np.ceil(i/3))-1,i%3])
        axs[int(np.ceil(i/3))-1,i%3].set_title(f"compared with {feature2}\ncorr: {round(corr_filtered_oans.loc[feature, feature2], 3)}, p_val: {round(p_val,5)}, CI: {[round(c,3) for c in CI]}", fontweight='bold')
    # add title "feature vs correlated features" to the plot
    fig.suptitle(f"{feature}'s correlations", fontweight='bold')
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
from PIL import Image
import seaborn as sns
from scipy import stats
# for each feature, find highest and lowest valued row and display them
for feature in relevant_features:
    # print(f"Feature: {feature}")
    # print("Highest values:")
    
    h_res = filtered_oans.loc[filtered_oans[feature].nlargest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(h_res)
    h_base_path = f"./turns/{h_res['participant_id'].values[0]}/{h_res['person_robot'].values[0]}/run_{h_res['path_num'].values[0]}/turn_{h_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(h_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(h_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(h_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(h_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in h_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - highest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    # print("Lowest values:")
    l_res = filtered_oans.loc[filtered_oans[feature].nsmallest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(l_res)
    l_base_path = f"./turns/{l_res['participant_id'].values[0]}/{l_res['person_robot'].values[0]}/run_{l_res['path_num'].values[0]}/turn_{l_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(l_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(l_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(l_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(l_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in l_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - lowest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    print("\n\n")
No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.histplot(data=filtered_oans, x=feature, kde=True, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
import scipy.stats as stats

fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    stats.probplot(filtered_oans[feature], dist="norm", plot=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('Theoretical Quantiles')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('Ordered Values')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.boxplot(data=filtered_oans, y=feature, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image